/*
Paper title: "Impact of Juntos conditional cash transfer program on nutritional and cognitive outcomes in Peru: ///
Comparison between younger and older initial exposure"
Authors: Alan Sánchez, Guido Melendez, Jere Behrman
Code written by: Alan Sánchez & Guido Melendez
Note: this analysis uses datafile "index_sibR1R4", which contains merged data from different rounds of the Young Lives Study in Peru.
*/

clear all
set matsize 5000
set memory 4000m
set maxvar 20000
set more off

* insert directory here
loc ruta   "F:\JUNTOS\_1 files"
loc rutaf  "F:\JUNTOS\_2 results"
loc rutagr "F:\JUNTOS\_3 graphics"

***************************************************************************
*                                                                         *
*                                                                         *
*              			 Table B.4 of APPENDIX B                          *
*                                                                         *
*                                                                         *
***************************************************************************

/* This do-file reports main results of table B.4 */

cd "`ruta'"
use index_sibR1R4, clear

***************************************************************************
*                                                                         *
*                                                                         *
*               I. DEFINIFION OF ANALYTICAL SAMPLE                        *
*                                                                         *
*                                                                         *
***************************************************************************

*** Only families with paired-siblings are considered for the analisys
duplicates tag CHILDID,generate(duplicate)
tab duplicate
drop if duplicate==0

/*** There are 62 families where the index child has 2 siblings instead of 1. We drop one of them, as follows:
In 55 families the second siblings is older than the index children. In this case we retain the younger sibling.
In the other 7 cases we chose one sibling at random.
*/
replace dup=1 if CHILDID=="PE161080" | CHILDID=="PE201100"
drop if CHILDID=="PE201100" & ID==16
drop if CHILDID=="PE171035" & ID==5
drop if CHILDID=="PE031085" & ID==17
drop if CHILDID=="PE091051" & ID==18
drop if CHILDID=="PE161080" & ID==4
drop if CHILDID=="PE201100"	& ID==16
drop if CHILDID=="PE091078"	& ID==6
drop if ID!=0 & duplicate==2 & age_months_78==.
drop duplicate

***************************************************************************
*                                                                         *
*                                                                         *
*              II. CREATION OF KEY VARIABLES (part 1)                     *
*                                                                         *
*                                                                         *
***************************************************************************

* Create household ID as numeric variable
sort CHILDID
egen hh=group(CHILDID)

* Create age in years 
gen age_45=int(age_months_45/12)
replace age_45=4 if age_45==3
replace age_45=6 if age_45==7

gen age_78=int(age_months_78/12)
replace age_78=7 if age_78==5
replace age_78=10 if age_78==11

* Standardizing PPVT scores by age in years (baseline)
egen std1=std(PPVT_45) if age_months_45<42 & PPVT_45!=.
egen std2=std(PPVT_45) if age_months_45>=43 & age_months_45<=48 & PPVT_45!=.
egen std3=std(PPVT_45) if age_months_45>=49 & age_months_45<=54 & PPVT_45!=.
egen std4=std(PPVT_45) if age_months_45>=55 & age_months_45<=60 & PPVT_45!=.
egen std5=std(PPVT_45) if age_months_45>=61 & age_months_45<=66 & PPVT_45!=.
egen std6=std(PPVT_45) if age_months_45>=67 & age_months_45<=72 & PPVT_45!=.
egen std7=std(PPVT_45) if age_months_45>=73 & age_months_45<=79 & PPVT_45!=.
egen std8=std(PPVT_45) if age_months_45>=80 & PPVT_45!=.

gen std_PPVT_45=.
replace std_PPVT_45=std1 if age_months_45>=37 & age_months_45<=42
replace std_PPVT_45=std2 if age_months_45>=43 & age_months_45<=48
replace std_PPVT_45=std3 if age_months_45>=49 & age_months_45<=54
replace std_PPVT_45=std4 if age_months_45>=55 & age_months_45<=60
replace std_PPVT_45=std5 if age_months_45>=61 & age_months_45<=66
replace std_PPVT_45=std6 if age_months_45>=67 & age_months_45<=72
replace std_PPVT_45=std7 if age_months_45>=73 & age_months_45<=79
replace std_PPVT_45=std8 if age_months_45>=80 & PPVT_45!=. 

label var std_PPVT_45 "Standardized PPVT raw original score, 4-5 age"
drop std1 std2 std3 std4 std5 std6 std7 std8 

* Standardizing PPVT scores by age in years (follow-up)
egen std7=std(PPVT_78)  if age_months_78<78 & PPVT_78!=.
egen std8=std(PPVT_78)  if age_months_78>=79  & age_months_78<=84 & PPVT_78!=.
egen std9=std(PPVT_78)  if age_months_78>=85  & age_months_78<=90 & PPVT_78!=.
egen std10=std(PPVT_78) if age_months_78>=91  & age_months_78<=96 & PPVT_78!=.
egen std11=std(PPVT_78) if age_months_78>=97  & age_months_78<=102 & PPVT_78!=.
egen std12=std(PPVT_78) if age_months_78>=103 & age_months_78<=108 & PPVT_78!=.
egen std13=std(PPVT_78) if age_months_78>=109 & age_months_78<=114 & PPVT_78!=.
egen std14=std(PPVT_78) if age_months_78>=115 & age_months_78<=120 & PPVT_78!=. 
egen std15=std(PPVT_78) if age_months_78>=121 & age_months_78<=126 & PPVT_78!=. 
egen std16=std(PPVT_78) if age_months_78>=127 & PPVT_78!=.

gen std_PPVT_78=.
replace std_PPVT_78=std7  if age_months_78>=73  & age_months_78<=78
replace std_PPVT_78=std8  if age_months_78>=79  & age_months_78<=84
replace std_PPVT_78=std9  if age_months_78>=85  & age_months_78<=90
replace std_PPVT_78=std10 if age_months_78>=91  & age_months_78<=96
replace std_PPVT_78=std11 if age_months_78>=97  & age_months_78<=102
replace std_PPVT_78=std12 if age_months_78>=103 & age_months_78<=108
replace std_PPVT_78=std13 if age_months_78>=109 & age_months_78<=114
replace std_PPVT_78=std14 if age_months_78>=115 & age_months_78<=120 
replace std_PPVT_78=std15 if age_months_78>=121 & age_months_78<=126 
replace std_PPVT_78=std16 if age_months_78>=127  

label var std_PPVT_78 "Standardized PPVT raw original score, 7-8 age"
drop std7 std8 std9 std10 std11 std12 std13 std14 std15 std16 

* Severe stunting
gen estunted_45=0
replace estunted_45=1 if zhfa_45<-3
replace estunted_45=. if zhfa_45==.
label var estunted_45 "Child is severely stunted, 4-5 age"

gen estunted_78=0
replace estunted_78=1 if zhfa_78<-3 
replace estunted_78=. if zhfa_78==.
label var estunted_78 "Child is extremely stunted, 7-8 age"

* Variables associated to Juntos elegibility rule (round 2)

clonevar tipom2R2=tipom2_45 if sib==0
clonevar tipom3R2=tipom3_45 if sib==0
clonevar tipom4R2=tipom4_45 if sib==0
clonevar serv3R2=serv3_45 if sib==0
clonevar n_equipR2=n_equip_45 if sib==0
clonevar combustR2=combust_45 if sib==0
clonevar women_weducR2=women_weduc_45 if sib==0
clonevar edu_men614R2=edu_men614_45 if sib==0
clonevar edu_men617R2=edu_men617_45 if sib==0
clonevar age_caregiverR2=age_caregiver_45 if sib==0
clonevar HHSIZER2=HHSIZE_45 if sib==0

foreach x in altitudeR2 tipom2R2 tipom3R2 tipom4R2 serv3R2 n_equipR2 combustR2 women_weducR2 edu_men614R2 edu_men617R2 age_caregiverR2 HHSIZER2 hh_agricR2 hh_noactR2 {
replace `x'=0 if `x'==.
bys CHILDID: egen junk`x'=sum(`x')
replace `x'=junk`x'
drop junk`x'
}

* Diferences in HAZ, sibling

foreach y in std_PPVT zhfa estunted stunted {
gen dif`y'=`y'_78-`y'_45 
} 

label var difstd_PPVT "Change in standardized ppvt score"
label var difzhfa "Change in haz"
label var difstunted "Change in stunting"
label var difstunted "Change in stunting"

order CHILDID ID sib ybirth mbirth monthdob METH MUMED CAREED CETH mum_analf care_analf CLUSTIDR1 zhfaR1 ///
altitudeR1 tipom2R1 tipom3R1 tipom4R1 serv3R1 n_equipR1 ///
combustR1 women_weducR1 edu_men614R1 edu_men617R1 HHSIZER1 age_caregiverR1 hh_agricR1 hh_noactR1 ///
altitudeR2 tipom2R2 tipom3R2 tipom4R2 serv3R2 n_equipR2 ///
combustR2 women_weducR2 edu_men614R2 edu_men617R2 HHSIZER2 age_caregiverR2 hh_agricR2 hh_noactR2 female CLANG


***************************************************************************
*                                                                         *
*                                                                         *                                                                         *
*            III. RESHAPE OF DATABASE FOR PANEL DATA ANALYSIS             *
*                                                                         *
*                                                                         *
***************************************************************************

reshape long date_int_ year_ rural_ electricity_ water_ drain_ serv3_ floor_ roof_ wall_ combust_ n_equip_ hq_ ///
cd_ sv_ wi_ tipom1_ tipom2_ tipom3_ tipom4_ totalexp_rpc_ HHSIZE_ edu_men614_ edu_men617_ women_weduc_ ///
age_mother_ age_caregiver_ age_months_ zhfa_ stunted_ PPVT_ age_ std_PPVT_ estunted_ ATTNDSCHL_ GRADE_ ppvtlang_ ///
, i(CHILDID sib) j(round)

foreach x in date_int year rural electricity water drain serv3 floor roof wall combust n_equip hq ///
cd sv wi tipom1 tipom2 tipom3 tipom4 totalexp_rpc HHSIZE edu_men614 edu_men617 women_weduc ///
age_mother age_caregiver age_months zhfa stunted PPVT age std_PPVT estunted ATTNDSCHL GRADE ppvtlang ///
{
rename `x'_ `x'
}
 
label var date_int "Date of interview (in months)"
label var year "Year of interview"
label var rural "Area of residence is rural"
label var electricity "Dwelling has electricity"
label var water "Main source of drinking water is Piped into dwelling/yard/plot"
label var drain "Type of toilet facility is Flush toilet/septic tank in dwelling"
label var serv3 "Number of basic public services" 
label var floor "Main floor material is high quality material" 
label var roof "Main roof material is high quality material"
label var wall "Main wall material is high quality material"
label var combust "Main fuel for cooking is gas, electricity or kerosene"
label var n_equip "Number of assets household doesn't have"
label var hq "Housing queality index"
label var cd "Consumer durables index"
label var sv "Access to services index"
label var wi "Wealth index"
label var tipom1 "Type of housing quality: very low"
label var tipom2 "Type of housing quality: low"
label var tipom3 "Type of housing quality: medium"
label var tipom4 "Type of housing quality: high"
label var totalexp_rpc "Total real consumption per capita - base 2006"
label var HHSIZE "Household size"
label var edu_men614 "Percentage of children who are in full-time education (6-17)"
label var edu_men617 "Percentage of children who are in full-time education (6-14)"
label var women_weduc "Percentage of adult women without formal education"
label var age_mother "Age of mother (in years)"
label var age_caregiver "Age of mother (in years)" 
label var age_months "Age of child in months"
label var zhfa "Heigh-for-age z score"
label var stunted "Child is stunted"
label var PPVT "PPVT original raw score"
label var age "Age of child (in years)"
label var std_PPVT "Standardized PPVT original raw score"
label var estunted "Child is extremely stunted"
label var ATTNDSCHL "Are you currenty in full-time education?"
label var GRADE "Highest grade attainment or current school grade"
label var ppvtlang "Language used by child during PPVT test"

replace round=0 if round==45
replace round=1 if round==78
label define round 0 "Before" 1 "After"
label values round round
format %10.0g round
label var round "After treatment"

egen RCHILDID =concat(CHILDID sib)
egen nRCHILDID=group(RCHILDID)
egen cid=group(CHILDID)

* Defining as a panel

sort CHILDID sib
egen pid=group(CHILDID sib)

xtset pid round

***************************************************************************
*                                                                         *
*                                                                         *
*              IV. CREATION OF KEY VARIABLES (part 2)                     *
*                                                                         *
*                                                                         *
***************************************************************************

* Months of exposure
gen months_exposed=date_int-datenrolljuntos
replace months_exposed=0 if months_exposed<0   
replace months_exposed=0 if rec_juntos==0
gen junk=months_exposed/12
gen years_exposed=round(junk)
replace years_exposed=0 if rec_juntos==0
tab years_exposed round if sib==0
tab years_exposed round if sib==1

drop junk 
gen junk=years_exposed
replace junk=0 if round==1
bys CHILDID sib: egen junk2=max(junk)
replace junk2=0 if sib==0
replace junk2=0 if round==0
tab junk2 round if sib==0
tab junk2 round if sib==1
rename junk2 years_exposed2
label var years_exposed2 "Years exposed to JUNTOS in round 0"

* Age of months during the first month ox exposition to JUNTOS
gen agemonths_firstT=datenrolljuntos- monthdob
replace agemonths_firstT=0 if agemonths_firstT<0 & agemonths_firstT!=.  
label var agemonths_firstT "Age in months during the first month of exposition to JUNTOS"

* Families enrolled to JUNTOS in 2005 and 2006 (not to be considered)
gen baselinejuntos=1
replace baselinejuntos=0 if yenrolljuntos==2005 | yenrolljuntos==2006

* Age in semesters
gen semester4=(age_months<=24) & round==0
gen semester5=(age_months>24 & age_months<=30) & round==0
gen semester6=(age_months>30 & age_months<=36) & round==0
gen semester7=(age_months>36 & age_months<=42) & round==0
gen semester8=(age_months>42 & age_months<=48) & round==0
gen semester9=(age_months>48 & age_months<=54) & round==0
gen semester10=(age_months>54 & age_months<=60) & round==0
gen semester11=(age_months>60 & age_months<=66) & round==0
gen semester12=(age_months>66 & age_months<=72) & round==0
gen semester13=(age_months>72 & age_months<=78) & round==0
gen semester14=(age_months>78 & age_months<=92) & round==0

* Groups of Juntos' families
gen juntosgroup=. 
replace juntosgroup=1 if juntosuntilR3==1 | juntosuntilR3==2
replace juntosgroup=2 if juntosuntilR3==0 & (juntosuntilR4==1 | juntosuntilR4==2)
replace juntosgroup=3 if juntosuntilR3==0 & juntosuntilR4==0
label var juntosgroup "Type of Juntos's treatment R2-R3" 
label define juntosgroup 1 "Treated R2-R3" 2 "Treated R3-R4" 3 "Never treated" 
label values juntosgroup juntosgroup 

* Siblings first exposed to JUNTOS at age 5 or above are not considered in the sample
gen years03=1
replace years03=0 if sib==1 & age_enrollment==5

tab age_enrollment if sib==1 & juntosgroup==1 & years03==1 & round==0 & baselinejuntos==1

* Create a dummy equal 1 if child recieves Juntos in R2-R3 and is observed after treatment
gen aftjuntosR3=rec_juntos*round
label var aftjuntosR3 "Child receives Juntos in R3, observed after treatment"

* Family recieves Juntos in R3-R4
gen juntosr4=.
replace juntosr4=0 if juntosuntilR4==0 & juntosuntilR3==0  
replace juntosr4=1 if juntosuntilR3==0 & (juntosuntilR4==1 | juntosuntilR4==2)
label var juntosr4 "Family recieves Juntos in R4"

* Create a dummy equal 1 if child recieves Juntos in R3-R4 and is observed after treatment
gen aftjuntosR4=juntosr4*round
label var aftjuntosR4 "Child receives Juntos in R4, observed after treatment"

* Create a dummy equal 1 if child recieves Juntos & is observed after treatment & is a younger sibling
gen aftjuntosR3sib=rec_juntos*round*sib
label var aftjuntosR3sib "Child receives Juntos, observed after treatment, younger siblings"

label var zhfaR1 "Index' height-for-age z score, round 1"

***************************************************************************
*                                                                         *
*                                                                         *
*                     V. REGRESSION ANALYSIS                              *
*                                                                         *
*                                                                         *
***************************************************************************                                                                        *

* Defining globals

global household "MUMED CETH wiR1"
global child "female i.age"
global child2 "female i.age i.ppvtlang"
global district "prom_nbi"
global cluster "CHILDID"
global clusterid "ubigeoR1"
global condition "(quintil==1 | quintil==2)"
global sample_j1 "sample==1"
global sample_j2 "cogsample==1"
global datejuntos "baselinejuntos==1"
global criticalperiod "years03==1"
global cogcontrol "i.ppvtlang"
global treatmentr3      "rec_juntos round                                     aftjuntosR3"
global fetreatmentr3    "           round                                     aftjuntosR3"
global treatmentr3s     "rec_juntos round                                     aftjuntosR3"
global fetreatmentr3s   "           round                                     aftjuntosR3"
global treatmentr3s_all "rec_juntos round                                     aftjuntosR3"
global treatmentr4      "juntosr4   round aftjuntosR4"
global fetreatmentr4    "           round aftjuntosR4"

* Defining sample for nutrition
keep if $criticalperiod 
xi: areg zhfa $household $child $district $treatmentr3 if $condition & baselinejuntos, abs($cluster)
gen junksample=e(sample)

duplicates tag CHILDID if junksample==1, gen(rep)
tab rep
gen sample=0
replace sample=1 if rep==3

tab rec_juntos if rep==3 & round==0 & sib==0 & (juntosgroup==1 | juntosgroup==3)

drop junksample rep

* Defining sample for cognitive outcomes

// Those children who took the PPVT in Spanish
tempvar junkppvtlang0
gen `junkppvtlang0'=ppvtlang if round==0
tempvar ppvtlang0
bys pid: egen `ppvtlang0'=min(`junkppvtlang0')

tempvar junkppvtlang1
gen `junkppvtlang1'=ppvtlang if round==1
tempvar ppvtlang1
bys pid: egen `ppvtlang1'=min(`junkppvtlang1')

gen bothspanishppvt=(`ppvtlang0'==4 & `ppvtlang1'==4)
replace bothspanishppvt=. if PPVT==.
label var bothspanishppvt "Language used by child during PPVT test is Spanish in both rounds"

// Final sample
reg std_PPVT zhfa $household $child $district $treatmentr3 if $condition & baselinejuntos, abs($cluster)
gen junksample=e(sample)

duplicates tag CHILDID sib if junksample==1, gen(rep) 
tab rep
gen cogsample=0
replace cogsample=1 if rep==1
replace cogsample=0 if cogsample==1 & bothspanishppvt==0
drop junksample rep

gen juntosgroup1=0
replace juntosgroup1=1 if juntosgroup==1


*******************************************************
*********** ESTIMATIONS WITHOUT BASELINE **************
*******************************************************

/* In this section, we compare results at the age of 7-8 yrs bewteen siblings
(treated early) and index children (late treated). We use cluster-fixed effects,
household-fixed effects and YOB fixed effects. These results are similar to differents 
in differences */  

xtset nRCHILDID round
drop if round==0
gen sibxjuntos=sib*rec_juntos
label var sibxjuntos "Child is sibling and recieves JUNTOS" 
label var female "Child is female"
xtset cid sib

global child "i.ybirth female i.age"
global child2 "i.ybirth female i.age i.ppvtlang"

global household "MUMED wiR1 HHSIZER1 hh_agricR1"

// Only YOB fix effects + Household fix effects

xi: areg stunted    sib sibxjuntos $child if $sample_j1 &                      (juntosgroup==1 | juntosgroup==3), cluster($cluster) abs(CHILDID) 
estimates store stunted_nobasel_hh

xi: areg estunted   sib sibxjuntos $child if $sample_j1 &                      (juntosgroup==1 | juntosgroup==3), cluster($cluster) abs(CHILDID) 
estimates store exstunted_nobasel_hh

xi: areg zhfa       sib sibxjuntos $child if $sample_j1 &                      (juntosgroup==1 | juntosgroup==3), cluster($cluster) abs(CHILDID) 
estimates store haz_nobasel_hh

xi: areg std_PPVT   sib sibxjuntos $child2 if $sample_j2 &                      (juntosgroup==1 | juntosgroup==3), cluster($cluster) abs(CHILDID) 
estimates store ppvt_nobasel_hh

xml_tab stunted_nobasel* exstunted_nobasel* haz_nobasel* ppvt_nobasel*, replace sheet(tableB4) save("`rutaf'\tableB4_appendixB.xls") title("Effects of first expansion of Juntos Program on our outcomes, double difference at 7-8 age") below stats(N r2 r2_a)

